In [43]:
import matplotlib
import pandas as pd
import seaborn
%matplotlib inline
In [336]:
df = pd.read_csv("disciplines-per-course.csv")
df.head()
Out[336]:
In [337]:
disciplinary_breakdown_by_course = df.groupby(["school","program","aggregates"]).size()
disciplinary_breakdown_by_course.head()
Out[337]:
In [338]:
ax = disciplinary_breakdown_by_course.unstack(level=2).plot(kind="barh",
stacked=True,
figsize=(11,8),
colormap="Set2",
)
fig = ax.get_figure()
fig.tight_layout()
fig.savefig("raw_discipline_breakdown.pdf")
In [339]:
raw_course_discipline_counts = df.groupby(["course","aggregates"]).size()
raw_course_discipline_counts.head()
Out[339]:
In [340]:
discipline_proportion_per_course = raw_course_discipline_counts.div(raw_course_discipline_counts.sum(level="course"), level="course")*100
In [341]:
discipline_proportion_per_course.unstack(level=1).fillna(0).to_csv("discipline_proportion_per_course.csv")
In [342]:
raw_programs_discipline_counts = df.groupby(["school","program","aggregates"]).size()
raw_programs_discipline_counts
Out[342]:
In [343]:
total_program_counts = raw_programs_discipline_counts.groupby(level=['school','program']).sum()
total_program_counts
Out[343]:
In [344]:
proportion_per_program = raw_programs_discipline_counts.unstack().div(total_program_counts, axis=0) * 100
In [ ]:
In [347]:
ax = proportion_per_program.plot(kind="barh",
stacked=True,
figsize=(11,8),
colormap="Set3",
)
ax.set_xlim(0,100)
ax.legend(bbox_to_anchor=(1.31, .85))
fig = ax.get_figure()
fig.tight_layout(pad=12)
fig.savefig("proportion_per_program.pdf")
In [355]:
proportion_per_program
Out[355]:
In [356]:
proportion_per_program.fillna(0).to_csv("proportion_per_program.csv")
In [357]:
from scipy.stats import entropy
In [368]:
core_complexity = proportion_per_program.fillna(0).apply(entropy, axis=1)
In [379]:
ax = core_complexity.sort(ascending=True, inplace=False).plot(kind="barh",
figsize=(11,8))
fig = ax.get_figure()
fig.tight_layout()
fig.savefig("complexity_per_program.pdf")
In [380]:
core_complexity
Out[380]:
In [381]:
proportion_per_program['diversity'] = core_complexity
In [384]:
proportion_per_program_ordered = proportion_per_program.sort(columns="diversity", inplace=False)
In [388]:
proportion_per_program_ordered.drop("diversity", axis=1, inplace=True)
In [523]:
ax = proportion_per_program_ordered.plot(kind="barh",
stacked=True,
figsize=(11,8),
colormap="Set3",
)
ax.set_xlim(0,100)
ax.legend(bbox_to_anchor=(1.31, .85))
fig = ax.get_figure()
ax.set_title("Disciplinary porportions per Program")
ax.set_xlabel('Percentage')
ax.set_ylabel('School-Program')
fig.tight_layout(pad=12)
#fig.savefig("proportion_per_program.png", dpi=300)
fig.savefig("proportion_per_program.pdf")
In [400]:
ischool_discipline_proportions = raw_programs_discipline_counts.groupby(level="aggregates").sum().div(raw_programs_discipline_counts.sum(),level='aggregates') * 100
In [503]:
ischool_discipline_proportions.sort(inplace=False)
Out[503]:
In [506]:
ax = ischool_discipline_proportions.plot(kind="pie",
stacked=True,
figsize=(20,20),
colormap="Set3",
label='',
fontsize=32,
autopct='%.1f')
fig = ax.get_figure()
fig.tight_layout(pad=12)
ax.set_title("Interdisciplinarity Across Syllabi", fontsize=36)
fig.savefig("proportion_all_ischools.png")
In [428]:
cleaned_citations = pd.read_csv("cleaned_cites.csv")
In [464]:
journal_articles = cleaned_citations[cleaned_citations['type'] == 'article']
top_journal_articles = journal_articles.groupby('title').size().sort(inplace=False, ascending=False)
In [519]:
ax = top_journal_articles.plot(kind="line",
figsize=(11,4),
use_index=False,
)
ax.set_xlim(0,100)
ax.set_ylim(0,7)
ax.set_title("Distribution of Popular Articles across Syllabi")
ax.set_xlabel('Article Popularity Ranking')
ax.set_ylabel('Number of Syllabi')
fig = ax.get_figure()
fig.tight_layout()
fig.savefig("popular-articles.pdf")
In [473]:
top_journal_articles.head(15)
Out[473]:
In [524]:
top_journal_titles = cleaned_citations[cleaned_citations['type'] == 'article'].groupby('journal').size().sort(inplace=False, ascending=False)
top_journal_titles.head(30)
Out[524]:
In [484]:
top_journal_titles.describe()
Out[484]:
In [517]:
ax = top_journal_titles.plot(kind="line",
figsize=(11,4),
use_index=False,
)
#ax.set_xlim(0,100)
#ax.set_ylim(0,7)
ax.set_title("Popularity Distribution of Journal Titles")
ax.set_xlabel('Journal Popularity Ranking')
ax.set_ylabel('Number of Citations')
fig = ax.get_figure()
fig.tight_layout()
fig.savefig("popular-journals.pdf")
In [ ]: